module net.BurtonRadons.spyl.lexer;

private import net.BurtonRadons.spyl.mark;
private import std.ctype;
private import std.c.stdio;
private import std.string;

/** Transforms an input text into a series of spyl tokens. */
class Lexer
{
    Mark mark; /**< Current source mark. */
    char *s; /**< Current string marker. */
    char *e; /**< End-of-string marker. */
    
    enum TokenType
    {
        EOF, /**< End-of-source token. */
        
        Float, /**< Float value. */
        Id, /**< An identifier. */
        Integer, /**< Integer value. */
        String, /**< String token. */
        
        Add, /**< "+". */
        AddAdd, /**< "++". */
        AddAssign, /**< "+=". */
        Assign, /**< "=". */
        Asterisk, /**< "*". */
        AsteriskAssign, /**< "*=". */
        AsteriskAsterisk, /**< "**". */
        AsteriskAsteriskAssign, /**< "**=". */
        BracketLeft, /**< "[". */
        BracketRight, /**< "]". */
        Comma, /**< ",". */
        CurlyBraceLeft, /**< "{". */
        CurlyBraceRight, /**< "}". */
        Dot, /**< ".". */
        DotDot, /**< "..". */
        Equals, /**< "==". */
        Minus, /**< "-". */
        MinusAssign, /**< "-=". */
        MinusMinus, /**< "--". */
        ParenthesisLeft, /**< "(". */
        ParenthesisRight, /**< ")". */
        SemiColon, /**< ";". */
        Tilde, /**< "~". */
        TildeAssign, /**< "~=". */
    }
    
    class Token
    {
        Mark mark; /**< Mark for the start of the token. */
        Token next; /**< Next token for lookaheads and the free list. */
        TokenType type; /**< Type of the token. */
        
        union
        {
            char [] text; /**< Text value for an identifier. */
            long integerValue; /**< Value for an integer. */
            real floatValue; /**< Value for a float. */
        }
        
        static char [] typeName (TokenType type)
        {
            switch (type)
            {
                case TokenType.EOF: return "end of file";
                case TokenType.Id: return "identifier";
                case TokenType.ParenthesisLeft: return '"("';
                case TokenType.ParenthesisRight: return '")"';
                case TokenType.CurlyBraceLeft: return '"{"';
                case TokenType.CurlyBraceRight: return '"}"';
                case TokenType.Comma: return '","';
                case TokenType.String: return "string";
                case TokenType.SemiColon: return '";"';
                case TokenType.Assign: return "=";
                case TokenType.Equals: return "==";
                case TokenType.Integer: return "integer";
                case TokenType.Add: return '"+"';
                case TokenType.AddAssign: return '"+="';
                case TokenType.AddAdd: return '"++"';
                case TokenType.Minus: return '"-"';
                case TokenType.MinusMinus: return '"--"';
                case TokenType.MinusAssign: return '"-="';
                case TokenType.Asterisk: return '"*"';
                case TokenType.AsteriskAsterisk: return '"**"';
                case TokenType.AsteriskAssign: return '"*="';
                case TokenType.AsteriskAsteriskAssign: return '"**="';
                case TokenType.Float: return "float";
                case TokenType.Dot: return '"."';
                case TokenType.DotDot: return '".."';
                case TokenType.BracketLeft: return '"["';
                case TokenType.BracketRight: return '"]"';
                case TokenType.Tilde: return '"~"';
                case TokenType.TildeAssign: return '"~="';
            }
        }
    }
    
    Token freeTokenList; /**< The start of the free token list. */
    Token currentToken; /**< The current token or null. */
    int errorCount; /**< The number of errors that have occurred. */
    
    /** Assign the source name and the source content. */
    this (char [] sourceName, char [] content)
    {
        mark.sourceName = sourceName;
        s = content;
        e = s + content.length;
    }
    
    void errorString (char [] string)
    {
        printf ("%.*s\n", string);
    }
    
    extern (C)
    void error (char [] format, ...)
    {
        char [4096] buffer;
        int length;
        
        length = sprintf (buffer, toStringz (format), cast (va_list) (&format + 1));
        errorString (buffer [0 .. length]);
        errorCount ++;
    }
    
    /** Parse the next token. */
    Token nextToken ()
    {
        Token token;
        
        if (currentToken !== null)
        {
            token = currentToken;
            currentToken = token.next;
        }
        else
            token = readToken ();
            
        token.next = freeTokenList;
        freeTokenList = token;
        return token;
    }
    
    /** Peek at the next token without consuming it. */
    Token peekToken ()
    {
        Token token;

        token = nextToken ();
        if (token === freeTokenList)
            freeTokenList = token.next;
        token.next = currentToken;
        currentToken = token;
        return token;
    }        
    
    /** Create a token and give it a type. */
    Token createToken (TokenType type)
    {
        Token token;
        
        if (freeTokenList)
        {
            token = freeTokenList;
            freeTokenList = token.next;
        }
        else
            token = new Token;
            
        token.mark = mark;
        token.type = type;
        return token;
    }
    
    /** Create a text token. */
    Token createToken (TokenType type, char [] text)
    {
        Token token = createToken (type);
        
        token.text = text;
        return token;
    }
    
    /** Create an integer token. */
    Token createToken (TokenType type, long value)
    {
        Token token = createToken (type);
        
        token.integerValue = value;
        return token;
    }
    
    /** Create a float token. */
    Token createToken (TokenType type, real value)
    {
        Token token = createToken (type);
        
        token.floatValue = value;
        return token;
    }
    
    bit isIdStart (char ch)
    {
        return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch > 128;
    }
    
    bit isIdMiddle (char ch)
    {
        return isIdStart (ch) || (ch >= '0' && ch <= '9');
    }
    
    private bit match (char ch)
    {
        if (s >= e || *s != ch)
            return false;
        s ++;
        return true;
    }
    
    /** Actual lexing of tokens; an internal function. */
    Token readToken ()
    {
    restart:
        if (s >= e)
            return createToken (TokenType.EOF);
        
        if (isspace (*s))
        {
            if (*s == "\n")
                mark.line ++;
            s ++;
            goto restart;
        }
        
        if (isIdStart (*s))
        {
            char *o = s;
            
            do s ++;
            while (s < e && isIdMiddle (*s));
                
            return createToken (TokenType.Id, o [0 .. (int) (s - o)]); 
        }
        
        if (*s == "\"" || *s == "'")
            return readTokenString ();
        
        if (*s >= '0' && *s <= '9')
            return readTokenNumber ();
        
        switch (*s ++)
        {
            case "(": return createToken (TokenType.ParenthesisLeft);
            case ")": return createToken (TokenType.ParenthesisRight);
            case "{": return createToken (TokenType.CurlyBraceLeft);
            case "}": return createToken (TokenType.CurlyBraceRight);
            case ",": return createToken (TokenType.Comma);
            case ";": return createToken (TokenType.SemiColon);
            case "=": return createToken (match ("=") ? TokenType.Equals : TokenType.Assign);
            case "+": return createToken (match ("=") ? TokenType.AddAssign : match ("+") ? TokenType.AddAdd : TokenType.Add);
            case "-": return createToken (match ("=") ? TokenType.MinusAssign : match ("-") ? TokenType.MinusMinus : TokenType.Minus);
            case "*": return createToken (match ("=") ? TokenType.AsteriskAssign : match ("*") ? (match ("=") ? TokenType.AsteriskAsteriskAssign : TokenType.AsteriskAsterisk) : TokenType.Asterisk);
            case ".": return createToken (match (".") ? TokenType.DotDot : TokenType.Dot);
            case "[": return createToken (TokenType.BracketLeft);
            case "]": return createToken (TokenType.BracketRight);
            case "~": return createToken (match ("=") ? TokenType.TildeAssign : TokenType.Tilde);
                
            default:
                s --;
                throw new Error ("readToken fallback '" ~ s [0 .. 1] ~ "'");
        }
    }
    
    Token readTokenNumber ()
    {
        int base = 10;
        long whole;
        real num = 0, den = 1;
        bit isFloat = false;
        int d;
        
        int digitBase ()
        {
            if (s >= e)
                return -1;
            if (*s >= '0' && *s <= '9')
                return *s ++ - '0';
            if (*s >= 'a' && *s <= 'f')
                return *s ++ - 'a' + 10;
            if (*s >= 'A' && *s <= 'F')
                return *s ++ - 'A' + 10;
            return -1;
        }
        
        int digit ()
        {
            int d = digitBase ();
            
            if (d < 0 || d >= base)
                return -1;
            return d;
        }
        
        while ((d = digit ()) >= 0)
            whole = whole * base + d; 
        
        if (match ("."))
        {
            isFloat = true;
            
            while ((d = digit ()) >= 0)
            {
                num = num * 10 + d;
                den = den * 10;
            }
        }
        
        if (isFloat)
            return createToken (TokenType.Float, (real) whole + num / den);
        return createToken (TokenType.Integer, whole);
    }
    
    Token readTokenString ()
    {
        Token result;
        
        if (*s == "\"")
        {
            char [] text;
            int length = 0;
            bit quickly = true;
            char *p;
            int lineAdd;
            
            for (p = s + 1; ; p ++, length ++)
                if (p >= e)
                    throw new Error ("String is not terminated.");
                else if (*p == "\n")
                    lineAdd ++;
                else if (*p == "\"")
                    break;
                else if (*p == "\\")
                {
                    quickly = false;
                    p ++;
                    if (p >= e)
                        continue;
                    if (*p == "\n")
                        lineAdd ++;
                    if (*p == "x" || *p == "X")
                        p += 2;
                    else if (*p == "u" || *p == "U")
                        p += 4;
                }
                
            if (quickly)
                text = s [1 .. (int) (p - s)];
            else
            {
                text = new char [length];
                int c = 0;
                
                for (p = s + 1; ; p ++, c ++)
                {
                    if (*p == "\"")
                        break;
                    else if (*p == "\\")
                    {
                        p ++;
                        if (*p == "\n")
                            text [c] = *p;
                        else switch (*p)
                        {
                            case "n": text [c] = "\n"; break;
                            case "\\": text [c] = "\\"; break;
                            case "r": text [c] = "\r"; break;
                            case "t": text [c] = "\t"; break;
                            case "0": text [c] = "\0"; break;
                            case "x":
                            case "X":
                            case "u":
                            case "U":
                                throw new Error ("\\x and \\u are unimplemented"); 
                        }
                    }
                    else
                        text [c] = *p;
                }
            }
            
            s = p + 1;
            result = createToken (TokenType.String, text);
            mark.line += lineAdd;
            return result;
        }
        else
            assert (0);
    }
    
    /** Expect a specific type of token, and give an error if it wasn't found. */
    bit expectToken (TokenType type)
    {
        Token token = nextToken ();
        
        if (token.type == type)
            return true;
        error ("Expected " ~ Token.typeName (type) ~ " but got " ~ Token.typeName (token.type) ~ ".");
        return false;
    }
    
    /** If the next token is this type, consume it and return true, or return false. */
    bit foundToken (TokenType type)
    {
        Token token = peekToken ();
        
        if (token.type == type)
        {
            nextToken ();
            return true;
        }
        return false;
    }
}
